/**
 * xmlpure.js
 *
 * Building upon and improving the CodeMirror 2 XML parser
 * @author: Dror BG (deebug.dev@gmail.com)
 * @date: August, 2011
 */

CodeMirror.defineMode("xmlpure", function (config, parserConfig) {
  // constants
  var STYLE_ERROR = "error";
  var STYLE_INSTRUCTION = "comment";
  var STYLE_COMMENT = "comment";
  var STYLE_ELEMENT_NAME = "tag";
  var STYLE_ATTRIBUTE = "attribute";
  var STYLE_WORD = "string";
  var STYLE_TEXT = "atom";

  var TAG_INSTRUCTION = "!instruction";
  var TAG_CDATA = "!cdata";
  var TAG_COMMENT = "!comment";
  var TAG_TEXT = "!text";

  var doNotIndent = {
    "!cdata":true,
    "!comment":true,
    "!text":true,
    "!instruction":true
  };

  // options
  var indentUnit = config.indentUnit;

  ///////////////////////////////////////////////////////////////////////////
  // helper functions

  // chain a parser to another parser
  function chain(stream, state, parser) {
    state.tokenize = parser;
    return parser(stream, state);
  }

  // parse a block (comment, CDATA or text)
  function inBlock(style, terminator, nextTokenize) {
    return function (stream, state) {
      while (!stream.eol()) {
        if (stream.match(terminator)) {
          popContext(state);
          state.tokenize = nextTokenize;
          break;
        }
        stream.next();
      }
      return style;
    };
  }

  // go down a level in the document
  // (hint: look at who calls this function to know what the contexts are)
  function pushContext(state, tagName) {
    var noIndent = doNotIndent.hasOwnProperty(tagName) || (state.context && state.context.doIndent);
    var newContext = {
      tagName:tagName,
      prev:state.context,
      indent:state.context ? state.context.indent + indentUnit : 0,
      lineNumber:state.lineNumber,
      indented:state.indented,
      noIndent:noIndent
    };
    state.context = newContext;
  }

  // go up a level in the document
  function popContext(state) {
    if (state.context) {
      var oldContext = state.context;
      state.context = oldContext.prev;
      return oldContext;
    }

    // we shouldn't be here - it means we didn't have a context to pop
    return null;
  }

  // return true if the current token is seperated from the tokens before it
  // which means either this is the start of the line, or there is at least
  // one space or tab character behind the token
  // otherwise returns false
  function isTokenSeparated(stream) {
    return stream.sol() ||
      stream.string.charAt(stream.start - 1) == " " ||
      stream.string.charAt(stream.start - 1) == "\t";
  }

  ///////////////////////////////////////////////////////////////////////////
  // context: document
  //
  // an XML document can contain:
  // - a single declaration (if defined, it must be the very first line)
  // - exactly one root element
  // @todo try to actually limit the number of root elements to 1
  // - zero or more comments
  function parseDocument(stream, state) {
    if (stream.eat("<")) {
      if (stream.eat("?")) {
        // processing instruction
        pushContext(state, TAG_INSTRUCTION);
        state.tokenize = parseProcessingInstructionStartTag;
        return STYLE_INSTRUCTION;
      } else if (stream.match("!--")) {
        // new context: comment
        pushContext(state, TAG_COMMENT);
        return chain(stream, state, inBlock(STYLE_COMMENT, "-->", parseDocument));
      } else if (stream.eatSpace() || stream.eol()) {
        stream.skipToEnd();
        return STYLE_ERROR;
      } else {
        // element
        state.tokenize = parseElementTagName;
        return STYLE_ELEMENT_NAME;
      }
    }

    // error on line
    stream.skipToEnd();
    return STYLE_ERROR;
  }

  ///////////////////////////////////////////////////////////////////////////
  // context: XML element start-tag or end-tag
  //
  // - element start-tag can contain attributes
  // - element start-tag may self-close (or start an element block if it doesn't)
  // - element end-tag can contain only the tag name
  function parseElementTagName(stream, state) {
    // get the name of the tag
    var startPos = stream.pos;
    if (stream.match(/^[a-zA-Z_:][-a-zA-Z0-9_:.]*/)) {
      // element start-tag
      var tagName = stream.string.substring(startPos, stream.pos);
      pushContext(state, tagName);
      state.tokenize = parseElement;
      return STYLE_ELEMENT_NAME;
    } else if (stream.match(/^\/[a-zA-Z_:][-a-zA-Z0-9_:.]*( )*>/)) {
      // element end-tag
      var endTagName = stream.string.substring(startPos + 1, stream.pos - 1).trim();
      var oldContext = popContext(state);
      state.tokenize = state.context == null ? parseDocument : parseElementBlock;
      if (oldContext == null || endTagName != oldContext.tagName) {
        // the start and end tag names should match - error
        return STYLE_ERROR;
      }
      return STYLE_ELEMENT_NAME;
    } else {
      // no tag name - error
      state.tokenize = state.context == null ? parseDocument : parseElementBlock;
      stream.eatWhile(/[^>]/);
      stream.eat(">");
      return STYLE_ERROR;
    }

    stream.skipToEnd();
    return null;
  }

  function parseElement(stream, state) {
    if (stream.match(/^\/>/)) {
      // self-closing tag
      popContext(state);
      state.tokenize = state.context == null ? parseDocument : parseElementBlock;
      return STYLE_ELEMENT_NAME;
    } else if (stream.eat(/^>/)) {
      state.tokenize = parseElementBlock;
      return STYLE_ELEMENT_NAME;
    } else if (isTokenSeparated(stream) && stream.match(/^[a-zA-Z_:][-a-zA-Z0-9_:.]*( )*=/)) {
      // attribute
      state.tokenize = parseAttribute;
      return STYLE_ATTRIBUTE;
    }

    // no other options - this is an error
    state.tokenize = state.context == null ? parseDocument : parseDocument;
    stream.eatWhile(/[^>]/);
    stream.eat(">");
    return STYLE_ERROR;
  }

  ///////////////////////////////////////////////////////////////////////////
  // context: attribute
  //
  // attribute values may contain everything, except:
  // - the ending quote (with ' or ") - this marks the end of the value
  // - the character "<" - should never appear
  // - ampersand ("&") - unless it starts a reference: a string that ends with a semi-colon (";")
  // ---> note: this parser is lax in what may be put into a reference string,
  // ---> consult http://www.w3.org/TR/REC-xml/#NT-Reference if you want to make it tighter
  function parseAttribute(stream, state) {
    var quote = stream.next();
    if (quote != "\"" && quote != "'") {
      // attribute must be quoted
      stream.skipToEnd();
      state.tokenize = parseElement;
      return STYLE_ERROR;
    }

    state.tokParams.quote = quote;
    state.tokenize = parseAttributeValue;
    return STYLE_WORD;
  }

  // @todo: find out whether this attribute value spans multiple lines,
  //        and if so, push a context for it in order not to indent it
  //        (or something of the sort..)
  function parseAttributeValue(stream, state) {
    var ch = "";
    while (!stream.eol()) {
      ch = stream.next();
      if (ch == state.tokParams.quote) {
        // end quote found
        state.tokenize = parseElement;
        return STYLE_WORD;
      } else if (ch == "<") {
        // can't have less-than signs in an attribute value, ever
        stream.skipToEnd()
        state.tokenize = parseElement;
        return STYLE_ERROR;
      } else if (ch == "&") {
        // reference - look for a semi-colon, or return error if none found
        ch = stream.next();

        // make sure that semi-colon isn't right after the ampersand
        if (ch == ';') {
          stream.skipToEnd()
          state.tokenize = parseElement;
          return STYLE_ERROR;
        }

        // make sure no less-than characters slipped in
        while (!stream.eol() && ch != ";") {
          if (ch == "<") {
            // can't have less-than signs in an attribute value, ever
            stream.skipToEnd()
            state.tokenize = parseElement;
            return STYLE_ERROR;
          }
          ch = stream.next();
        }
        if (stream.eol() && ch != ";") {
          // no ampersand found - error
          stream.skipToEnd();
          state.tokenize = parseElement;
          return STYLE_ERROR;
        }
      }
    }

    // attribute value continues to next line
    return STYLE_WORD;
  }

  ///////////////////////////////////////////////////////////////////////////
  // context: element block
  //
  // a block can contain:
  // - elements
  // - text
  // - CDATA sections
  // - comments
  function parseElementBlock(stream, state) {
    if (stream.eat("<")) {
      if (stream.match("?")) {
        pushContext(state, TAG_INSTRUCTION);
        state.tokenize = parseProcessingInstructionStartTag;
        return STYLE_INSTRUCTION;
      } else if (stream.match("!--")) {
        // new context: comment
        pushContext(state, TAG_COMMENT);
        return chain(stream, state, inBlock(STYLE_COMMENT, "-->",
          state.context == null ? parseDocument : parseElementBlock));
      } else if (stream.match("![CDATA[")) {
        // new context: CDATA section
        pushContext(state, TAG_CDATA);
        return chain(stream, state, inBlock(STYLE_TEXT, "]]>",
          state.context == null ? parseDocument : parseElementBlock));
      } else if (stream.eatSpace() || stream.eol()) {
        stream.skipToEnd();
        return STYLE_ERROR;
      } else {
        // element
        state.tokenize = parseElementTagName;
        return STYLE_ELEMENT_NAME;
      }
    } else {
      // new context: text
      pushContext(state, TAG_TEXT);
      state.tokenize = parseText;
      return null;
    }

    state.tokenize = state.context == null ? parseDocument : parseElementBlock;
    stream.skipToEnd();
    return null;
  }

  function parseText(stream, state) {
    stream.eatWhile(/[^<]/);
    if (!stream.eol()) {
      // we cannot possibly be in the document context,
      // just inside an element block
      popContext(state);
      state.tokenize = parseElementBlock;
    }
    return STYLE_TEXT;
  }

  ///////////////////////////////////////////////////////////////////////////
  // context: XML processing instructions
  //
  // XML processing instructions (PIs) allow documents to contain instructions for applications.
  // PI format: <?name data?>
  // - 'name' can be anything other than 'xml' (case-insensitive)
  // - 'data' can be anything which doesn't contain '?>'
  // XML declaration is a special PI (see XML declaration context below)
  function parseProcessingInstructionStartTag(stream, state) {
    if (stream.match("xml", true, true)) {
      // xml declaration
      if (state.lineNumber > 1 || stream.pos > 5) {
        state.tokenize = parseDocument;
        stream.skipToEnd();
        return STYLE_ERROR;
      } else {
        state.tokenize = parseDeclarationVersion;
        return STYLE_INSTRUCTION;
      }
    }

    // regular processing instruction
    if (isTokenSeparated(stream) || stream.match("?>")) {
      // we have a space after the start-tag, or nothing but the end-tag
      // either way - error!
      state.tokenize = parseDocument;
      stream.skipToEnd();
      return STYLE_ERROR;
    }

    state.tokenize = parseProcessingInstructionBody;
    return STYLE_INSTRUCTION;
  }

  function parseProcessingInstructionBody(stream, state) {
    stream.eatWhile(/[^?]/);
    if (stream.eat("?")) {
      if (stream.eat(">")) {
        popContext(state);
        state.tokenize = state.context == null ? parseDocument : parseElementBlock;
      }
    }
    return STYLE_INSTRUCTION;
  }


  ///////////////////////////////////////////////////////////////////////////
  // context: XML declaration
  //
  // XML declaration is of the following format:
  // <?xml version="1.0" encoding="UTF-8" standalone="no" ?>
  // - must start at the first character of the first line
  // - may span multiple lines
  // - must include 'version'
  // - may include 'encoding' and 'standalone' (in that order after 'version')
  // - attribute names must be lowercase
  // - cannot contain anything else on the line
  function parseDeclarationVersion(stream, state) {
    state.tokenize = parseDeclarationEncoding;

    if (isTokenSeparated(stream) && stream.match(/^version( )*=( )*"([a-zA-Z0-9_.:]|\-)+"/)) {
      return STYLE_INSTRUCTION;
    }
    stream.skipToEnd();
    return STYLE_ERROR;
  }

  function parseDeclarationEncoding(stream, state) {
    state.tokenize = parseDeclarationStandalone;

    if (isTokenSeparated(stream) && stream.match(/^encoding( )*=( )*"[A-Za-z]([A-Za-z0-9._]|\-)*"/)) {
      return STYLE_INSTRUCTION;
    }
    return null;
  }

  function parseDeclarationStandalone(stream, state) {
    state.tokenize = parseDeclarationEndTag;

    if (isTokenSeparated(stream) && stream.match(/^standalone( )*=( )*"(yes|no)"/)) {
      return STYLE_INSTRUCTION;
    }
    return null;
  }

  function parseDeclarationEndTag(stream, state) {
    state.tokenize = parseDocument;

    if (stream.match("?>") && stream.eol()) {
      popContext(state);
      return STYLE_INSTRUCTION;
    }
    stream.skipToEnd();
    return STYLE_ERROR;
  }

  ///////////////////////////////////////////////////////////////////////////
  // returned object
  return {
    electricChars:"/",

    startState:function () {
      return {
        tokenize:parseDocument,
        tokParams:{},
        lineNumber:0,
        lineError:false,
        context:null,
        indented:0
      };
    },

    token:function (stream, state) {
      if (stream.sol()) {
        // initialize a new line
        state.lineNumber++;
        state.lineError = false;
        state.indented = stream.indentation();
      }

      // eat all (the spaces) you can
      if (stream.eatSpace()) return null;

      // run the current tokenize function, according to the state
      var style = state.tokenize(stream, state);

      // is there an error somewhere in the line?
      state.lineError = (state.lineError || style == "error");

      return style;
    },

    blankLine:function (state) {
      // blank lines are lines too!
      state.lineNumber++;
      state.lineError = false;
    },

    indent:function (state, textAfter) {
      if (state.context) {
        if (state.context.noIndent == true) {
          // do not indent - no return value at all
          return;
        }
        if (textAfter.match(/^<\/.*/)) {
          // eng-tag - indent back to last context
          return state.context.indent;
        }
        // indent to last context + regular indent unit
        return state.context.indent + indentUnit;
      }
      return 0;
    },

    compareStates:function (a, b) {
      if (a.indented != b.indented) return false;
      for (var ca = a.context, cb = b.context; ; ca = ca.prev, cb = cb.prev) {
        if (!ca || !cb) return ca == cb;
        if (ca.tagName != cb.tagName) return false;
      }
    }
  };
});

CodeMirror.defineMIME("application/xml", "purexml");
CodeMirror.defineMIME("text/xml", "purexml");
